package com.clark.crawler.novel.processor;

import com.clark.crawler.novel.bean.Chapter;
import com.clark.crawler.novel.NovelUtil;
import com.clark.crawler.novel.property.NovelProperties;
import us.codecraft.webmagic.Page;
import us.codecraft.webmagic.Site;
import us.codecraft.webmagic.processor.PageProcessor;

/**
 * @author code4crafter@gmail.com <br>
 * @since 0.5.1
 */
public class ChapterPageProcessor implements PageProcessor {

    private Site site = Site.me().setRetryTimes(3).setSleepTime(0);

    @Override
    public void process(Page page) {
        String title = page.getHtml().xpath(NovelProperties.chapterTitleReg).toString();
        String content = page.getHtml().xpath(NovelProperties.chapterContentReg).toString();

        Chapter chapter = new Chapter();
        chapter.setContent(content);
        chapter.setTitle(title);
        chapter.setUrl(page.getUrl().toString());
        page.putField("chapter",chapter);
    }

    @Override
    public Site getSite() {
        return site;
    }

}
